R Markdown

library(prettydoc)
library(arsenal)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(here)
## here() starts at /Users/stephcopeland/Documents/GitHub/222 Final Project/last take
library(tidyr)
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following objects are masked from 'package:arsenal':
## 
##     %nin%, N
library(tibble)
library(calecopal)
library(ggeffects)
library(gt)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(viridis)
## Loading required package: viridisLite
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(modelr)
## 
## Attaching package: 'modelr'
## The following object is masked from 'package:ggeffects':
## 
##     data_grid
library(gtsummary)
data1 <- read.csv("last_take.csv")
head(data1)
##       country code area_sqkm population GDP_capita        dalys
## 1 Afghanistan  AFG    652860   36296111   519.8889 2.544699e+05
## 2      Angola  AGO   1246700   29816769  4095.8101 8.006665e+05
## 3     Albania  ALB     27400    2873457  4531.0208 1.543584e+02
## 4     Andorra  AND       470      76997 38964.9045 3.942648e-01
## 5   Argentina  ARG   2736690   44044811 14613.0418 2.894639e+04
## 6     Armenia  ARM     28470    2944789  3914.5279 1.241430e+03
##   tree_cover_loss_ha
## 1           25.70585
## 2       280456.32110
## 3         1689.02503
## 4            3.42216
## 5       233919.67780
## 6           41.74732
data2 <- data1 %>% 
  mutate(prop_tree_loss = (tree_cover_loss_ha/area_sqkm)) %>% 
  mutate(prop_dalys = (dalys/population))
head(data2)
##       country code area_sqkm population GDP_capita        dalys
## 1 Afghanistan  AFG    652860   36296111   519.8889 2.544699e+05
## 2      Angola  AGO   1246700   29816769  4095.8101 8.006665e+05
## 3     Albania  ALB     27400    2873457  4531.0208 1.543584e+02
## 4     Andorra  AND       470      76997 38964.9045 3.942648e-01
## 5   Argentina  ARG   2736690   44044811 14613.0418 2.894639e+04
## 6     Armenia  ARM     28470    2944789  3914.5279 1.241430e+03
##   tree_cover_loss_ha prop_tree_loss   prop_dalys
## 1           25.70585   3.937421e-05 7.010940e-03
## 2       280456.32110   2.249589e-01 2.685289e-02
## 3         1689.02503   6.164325e-02 5.371872e-05
## 4            3.42216   7.281192e-03 5.120522e-06
## 5       233919.67780   8.547540e-02 6.572033e-04
## 6           41.74732   1.466362e-03 4.215684e-04
data3 <- data2 %>% 
  mutate(per_tree_loss = (prop_tree_loss * 100)) %>% 
  mutate(per_dalys = (prop_dalys * 100))
head(data3)
##       country code area_sqkm population GDP_capita        dalys
## 1 Afghanistan  AFG    652860   36296111   519.8889 2.544699e+05
## 2      Angola  AGO   1246700   29816769  4095.8101 8.006665e+05
## 3     Albania  ALB     27400    2873457  4531.0208 1.543584e+02
## 4     Andorra  AND       470      76997 38964.9045 3.942648e-01
## 5   Argentina  ARG   2736690   44044811 14613.0418 2.894639e+04
## 6     Armenia  ARM     28470    2944789  3914.5279 1.241430e+03
##   tree_cover_loss_ha prop_tree_loss   prop_dalys per_tree_loss    per_dalys
## 1           25.70585   3.937421e-05 7.010940e-03   0.003937421 0.7010939935
## 2       280456.32110   2.249589e-01 2.685289e-02  22.495894850 2.6852892837
## 3         1689.02503   6.164325e-02 5.371872e-05   6.164324938 0.0053718717
## 4            3.42216   7.281192e-03 5.120522e-06   0.728119233 0.0005120522
## 5       233919.67780   8.547540e-02 6.572033e-04   8.547540196 0.0657203287
## 6           41.74732   1.466362e-03 4.215684e-04   0.146636171 0.0421568401
ggplot(data3, aes(sample = prop_dalys)) +
  geom_qq() + geom_qq_line()
## Warning: Removed 15 rows containing non-finite values (stat_qq).
## Warning: Removed 15 rows containing non-finite values (stat_qq_line).

ggplot(data3, aes(sample = dalys)) +
  geom_qq() + geom_qq_line()
## Warning: Removed 13 rows containing non-finite values (stat_qq).
## Warning: Removed 13 rows containing non-finite values (stat_qq_line).

data3 <- data3 %>% 
  mutate(logdalys = log(dalys))
ggplot(data3, aes(sample = logdalys)) +
  geom_qq() + geom_qq_line()
## Warning: Removed 13 rows containing non-finite values (stat_qq).
## Warning: Removed 13 rows containing non-finite values (stat_qq_line).

data4 <- data3 %>% 
  mutate(log_dalys = log(prop_dalys))
head(data4)
##       country code area_sqkm population GDP_capita        dalys
## 1 Afghanistan  AFG    652860   36296111   519.8889 2.544699e+05
## 2      Angola  AGO   1246700   29816769  4095.8101 8.006665e+05
## 3     Albania  ALB     27400    2873457  4531.0208 1.543584e+02
## 4     Andorra  AND       470      76997 38964.9045 3.942648e-01
## 5   Argentina  ARG   2736690   44044811 14613.0418 2.894639e+04
## 6     Armenia  ARM     28470    2944789  3914.5279 1.241430e+03
##   tree_cover_loss_ha prop_tree_loss   prop_dalys per_tree_loss    per_dalys
## 1           25.70585   3.937421e-05 7.010940e-03   0.003937421 0.7010939935
## 2       280456.32110   2.249589e-01 2.685289e-02  22.495894850 2.6852892837
## 3         1689.02503   6.164325e-02 5.371872e-05   6.164324938 0.0053718717
## 4            3.42216   7.281192e-03 5.120522e-06   0.728119233 0.0005120522
## 5       233919.67780   8.547540e-02 6.572033e-04   8.547540196 0.0657203287
## 6           41.74732   1.466362e-03 4.215684e-04   0.146636171 0.0421568401
##     logdalys  log_dalys
## 1 12.4469377  -4.960284
## 2 13.5931998  -3.617382
## 3  5.0392773  -9.831749
## 4 -0.9307325 -12.182254
## 5 10.2732009  -7.327517
## 6  7.1240192  -7.771529
ggplot(data4, aes(sample = log_dalys)) +
  geom_qq() + geom_qq_line()
## Warning: Removed 15 rows containing non-finite values (stat_qq).
## Warning: Removed 15 rows containing non-finite values (stat_qq_line).

data4 <- data4 %>% 
  mutate(log_GDP = log(GDP_capita))
mod1 <- lm(logdalys ~ prop_tree_loss, data = data4)

summary(mod1)
## 
## Call:
## lm(formula = logdalys ~ prop_tree_loss, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.059  -2.194   0.059   2.519   7.314 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      9.1297     0.2579  35.397   <2e-16 ***
## prop_tree_loss  -0.1450     0.1040  -1.395    0.165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.343 on 174 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.01105,    Adjusted R-squared:  0.00537 
## F-statistic: 1.945 on 1 and 174 DF,  p-value: 0.1649
ggplot(data4, aes(y = log_dalys, x = prop_tree_loss)) +
  geom_point() +
  stat_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

data4 <- data4 %>% 
  mutate(win_prop_tree = Winsorize(prop_tree_loss, minval = NULL, maxval = NULL, probs = c(0.00, 0.95), na.rm = TRUE, type = 9))
graph1 <- ggplot(data4, aes(y = log_dalys, x = win_prop_tree)) +
  geom_point(color = 'darkblue', size = 3) +
  stat_smooth(method = "lm", color = "red", fill = "#69b3a2", se = TRUE) +
  labs(x = "Proportion of Forest Loss (ha)", y = "Proportion of DALYs (log)") +
  ggtitle("Effect of Forest Loss on Disability Adjusted Life Years (DALYs) by country") +
  theme_bw() +
  theme(plot.title=element_text(hjust=0.5, vjust=0.5))
graph1
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

mod2 <- lm(logdalys ~ win_prop_tree, data = data4)

summary(mod2)
## 
## Call:
## lm(formula = logdalys ~ win_prop_tree, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8125 -2.1737 -0.1597  2.6571  7.4237 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     8.8773     0.3137  28.297   <2e-16 ***
## win_prop_tree   0.6133     0.6480   0.946    0.345    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.353 on 174 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.005121,   Adjusted R-squared:  -0.0005966 
## F-statistic: 0.8957 on 1 and 174 DF,  p-value: 0.3453
fire <- cal_palette(name = "fire", n = 15, type = "continuous")
superbloom <- cal_palette(name = "superbloom3", n = 15, type = "continuous")
kelp <- cal_palette(name = "kelp1", n = 15, type = "continuous")

kelp1 <- cal_palette(name = "kelp1", n = 2, type = "continuous")
graph2 <- ggplot(data4, aes(y = log_dalys, x = win_prop_tree, color = log_GDP)) +
  geom_point(size = 3) +
  stat_smooth(method = "lm", color = "red", fill = "#69b3a2", se = TRUE) +
  labs(x = "Proportion of Forest Loss (ha)", y = "Proportion of DALYs (log)") +
  ggtitle("Effect of Forest Loss on Disability Adjusted Life Years (DALYs) by Country") +
  theme_bw()

graph2 +
  guides(size = FALSE) +
  labs(colour = "GDP per Capita") +
  theme(legend.position = "bottom") +
  theme(plot.title=element_text(hjust=0.5, vjust=0.5)) +
  scale_color_gradientn(colours = kelp)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

mod3 <- lm(log_dalys ~ win_prop_tree*log_GDP, data = data4)

summary(mod3)
## 
## Call:
## lm(formula = log_dalys ~ win_prop_tree * log_GDP, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1073 -0.9923  0.1520  0.9996  4.7735 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.46699    0.94271   4.738 4.55e-06 ***
## win_prop_tree          1.55754    1.92518   0.809    0.420    
## log_GDP               -1.33315    0.10841 -12.297  < 2e-16 ***
## win_prop_tree:log_GDP -0.09874    0.22240  -0.444    0.658    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.676 on 169 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.5837, Adjusted R-squared:  0.5763 
## F-statistic: 78.99 on 3 and 169 DF,  p-value: < 2.2e-16
names(mod3)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "na.action"     "xlevels"       "call"          "terms"        
## [13] "model"
 #ggplot(data4, aes(y = log_dalys, x = log_GDP, color = win_prop_tree)) +
  #geom_point(aes(size = 0.5)) +
  #stat_smooth(method = "lm", se = FALSE)

graph3 <- ggplot(data4, aes(y = log_dalys, x = log_GDP, color = log_GDP)) +
  geom_point(size = 3) +
  stat_smooth(method = "lm", color = "blue", fill = "#69b3a2", se = TRUE) +
  labs(x = "GDP per capita", y = "Proportion of DALYs (log)") +
  ggtitle("Effect of Economic Wealth on Disability Adjusted Life Years (DALYs) by country") +
  theme_bw()

graph3 +
  guides(size = FALSE) +
  labs(colour = "GDP per Capita") +
  theme(legend.position = "bottom") +
  theme(plot.title=element_text(hjust=0.5, vjust=0.5)) +
  scale_color_gradientn(colours = kelp)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

graph4 <- ggplot(data4, aes(y = log_dalys, x = log_GDP, color = win_prop_tree)) +
  geom_point(size = 3) +
  stat_smooth(method = "lm", color = "blue", fill = "#69b3a2", se = TRUE) +
  labs(x = "GDP per capita", y = "Proportion of DALYs (log)") +
  ggtitle("Effect of Economic Wealth on Disability Adjusted Life Years (DALYs) by country") +
  theme_bw()

graph4 +
  guides(size = FALSE) +
  labs(colour = "Proportion of Forest Loss") +
  theme(legend.position = "bottom") +
  theme(plot.title=element_text(hjust=0.5, vjust=0.5)) +
  scale_color_gradientn(colours = superbloom)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

data5 <- data4 %>% 
  mutate(country_income_level = 
           case_when(GDP_capita >= 13000 ~ "high", 
                     GDP_capita >= 4000 ~ "middle", 
                     GDP_capita <= 4000 ~ "low"))
data5 %>% 
  group_by(country_income_level) %>% 
  summarize(n = n())
## # A tibble: 4 × 2
##   country_income_level     n
##   <chr>                <int>
## 1 high                    52
## 2 low                     72
## 3 middle                  51
## 4 <NA>                    14
equLOWcountries <- c("Burundi", "Benin", "Bhangladesh", "Bolivia", "Central African Republic", "Cote d'Ivoire", "Cameroon", "Democratic Republic of Congo", "Congo", "Comoros", "Ethiopia", "Ghana", "Guinea", "Gambia", "Honduras", "Haiti", "Indonesia", "Kenya", "Cambodia", "Laos", "Liberia", "Madagascar", "Mali", "Myanmar", "Mozambique",
"Mauritania", "Malawi", "Nigeria", "Nicaragua", "Phillipines", "Rwanda", "Senegal", "Sierra Leone", "El Salvador", "Eswatini", "Togo", "Tanzania", "Uganda", "Vietnam", "Zambia", "Zimbabwe")
noneqLOWcountries <- c("AFG", "ARM", "BTN", "CPV", "EGY", "ERI", "FSM", "GNB", "IND", "KGZ", "LSO", "MAR", "MDA", "MNG", "NER", "NPL", "PAK", "PNG", "SDN", "SLB", "SSD", "STP", "SYR", "TCD", "TJK", "TLS", "TUN", "UKR", "UZB", "VUT") 
data6 <- data5 %>% 
  filter(country_income_level == "low") %>% 
  filter(!(code %in% c(noneqLOWcountries)))

mod3 <- lm(log_dalys ~ win_prop_tree + log_GDP, data = data4)

mod4 <- lm(log_dalys ~ win_prop_tree * GDP_capita, data = data6)

summary(mod4)
## 
## Call:
## lm(formula = log_dalys ~ win_prop_tree * GDP_capita, data = data6)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90404 -0.93667  0.00806  0.77030  1.99482 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -3.2146577  0.4261939  -7.543 4.59e-09 ***
## win_prop_tree             1.0140100  0.7203759   1.408   0.1674    
## GDP_capita               -0.0005546  0.0002762  -2.008   0.0518 .  
## win_prop_tree:GDP_capita -0.0007232  0.0004452  -1.625   0.1125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.075 on 38 degrees of freedom
## Multiple R-squared:  0.4589, Adjusted R-squared:  0.4162 
## F-statistic: 10.74 on 3 and 38 DF,  p-value: 2.989e-05
mod5 <- lm(log_dalys ~ win_prop_tree * GDP_capita, data = data6)

summary(mod5)
## 
## Call:
## lm(formula = log_dalys ~ win_prop_tree * GDP_capita, data = data6)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90404 -0.93667  0.00806  0.77030  1.99482 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -3.2146577  0.4261939  -7.543 4.59e-09 ***
## win_prop_tree             1.0140100  0.7203759   1.408   0.1674    
## GDP_capita               -0.0005546  0.0002762  -2.008   0.0518 .  
## win_prop_tree:GDP_capita -0.0007232  0.0004452  -1.625   0.1125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.075 on 38 degrees of freedom
## Multiple R-squared:  0.4589, Adjusted R-squared:  0.4162 
## F-statistic: 10.74 on 3 and 38 DF,  p-value: 2.989e-05
#ggplot(data6, aes(y = log_dalys, x = win_prop_tree)) +
  #geom_point(aes(color = GDP_capita, size = 1.0)) +
  #stat_smooth(method = "lm", se = FALSE)

graph5 <- ggplot(data6, aes(y = log_dalys, x = win_prop_tree, color = GDP_capita)) +
  geom_point(size = 4) +
  stat_smooth(method = "lm", color = "red", fill = "#69b3a2", se = TRUE) +
  labs(x = "Proportion of Forest Loss (ha)", y = "Proportion of DALYs (log)") +
  ggtitle("Effect of Forest Loss on Disability Adjusted Life Years (DALYs) \n for Low Income, Tropical Nations") +
  theme_bw()

graph5 +
  guides(size = FALSE) +
  labs(colour = "GDP per Capita") +
  theme(legend.position = "bottom") +
  theme(plot.title=element_text(hjust=0.5, vjust=0.5)) +
  scale_color_gradientn(colours = kelp)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## `geom_smooth()` using formula 'y ~ x'

#ggplot(data6, aes(y = log_dalys, x = log_GDP)) +
  #geom_point(aes(color = win_prop_tree, size = 1.0)) +
  #stat_smooth(method = "lm", se = FALSE)

graph6 <- ggplot(data6, aes(y = log_dalys, x = GDP_capita, color = win_prop_tree)) +
  geom_point(size = 4) +
  stat_smooth(method = "lm", color = "red", fill = "#69b3a2", se = TRUE) +
  labs(x = "GDP per capita", y = "Proportion of DALYs (log)") +
  ggtitle("Effect of Economic Wealth on Disability Adjusted Life Years (DALYs) \n for Low Income, Tropical Nations") +
  theme_bw()

graph6 +
  guides(size = FALSE) +
  labs(colour = "Proportion of Forest Loss") +
  theme(legend.position = "bottom") +
  theme(plot.title=element_text(hjust=0.5, vjust=0.5)) +
  scale_color_gradientn(colours = superbloom)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## `geom_smooth()` using formula 'y ~ x'

mod5 <- lm(log_dalys ~ win_prop_tree * GDP_capita, data = data6)

summary(mod5)
## 
## Call:
## lm(formula = log_dalys ~ win_prop_tree * GDP_capita, data = data6)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90404 -0.93667  0.00806  0.77030  1.99482 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -3.2146577  0.4261939  -7.543 4.59e-09 ***
## win_prop_tree             1.0140100  0.7203759   1.408   0.1674    
## GDP_capita               -0.0005546  0.0002762  -2.008   0.0518 .  
## win_prop_tree:GDP_capita -0.0007232  0.0004452  -1.625   0.1125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.075 on 38 degrees of freedom
## Multiple R-squared:  0.4589, Adjusted R-squared:  0.4162 
## F-statistic: 10.74 on 3 and 38 DF,  p-value: 2.989e-05
data6 %>%
  arrange(desc(population)) %>%
  mutate(country = factor(country, country)) %>%
  ggplot(aes(x=GDP_capita, y=log_dalys, size=per_tree_loss, fill= GDP_capita)) +
    stat_smooth(method = "lm", color = "black", se = FALSE) +
    geom_point(alpha=0.5, shape=21, color="black") +
    scale_size(range = c(1.4, 19), name="Population (M)") +
    scale_fill_viridis(discrete=FALSE, guide=FALSE, option="A") +
    theme_bw() +
    theme(legend.position="bottom") +
    ylab("DALYs (log)") +
    xlab("Gdp per Capita") +
    theme(legend.position = "none")
## `geom_smooth()` using formula 'y ~ x'

p <- data6 %>%
  # Reorder countries to having big bubbles on top
  arrange(desc(per_tree_loss)) %>%
  mutate(country = factor(country, country)) %>%
  
  # prepare text for tooltip
  mutate(text = paste("Country: ", country, "\nPopulation (M): ", population, "\n% DALYs of Population: ", per_dalys, "\nGdp per capita: ", GDP_capita, "\n % Tree Loss of Country: ", per_tree_loss, sep="")) %>%
  
  # Classic ggplot
  ggplot( aes(x=GDP_capita, y=log_dalys, size = per_tree_loss, color = GDP_capita, text=text)) +
    geom_point(alpha=0.7) +
    scale_size(range = c(1.4, 19), name="% Tree Loss") +
    scale_color_viridis(discrete=FALSE, guide=FALSE, option = "A") +
    theme_bw() +
    theme(legend.position="none")

# turn ggplot interactive with plotly
pp <- ggplotly(p, tooltip="text")
pp

Hypothesis Testing and OLS violations Does deforestation contribute to the incidence and burden of neglected tropical diseases within countries?

H0: There is no effect of deforestation and NTD burden. B1 = 0 HA: There will be a positive effect between deforestation and NTD burden. A country with high forest loss will have a high NTD burden whereas a country with low forest loss will have low NTD burden B1 =/ 0

Predictions: I expect to fail to reject the null, this particularly data set is unlikely to capture the intricacies of NTD prevalence and estimation in relation to deforestation.

lm(gdp_growth ~ number + avg_strength, data = hgdp) %>% summary() %>% xtable() %>% kable(caption = “…”)

mod2 <- lm(logdalys ~ win_prop_tree, data = data4)

summary(mod2)
## 
## Call:
## lm(formula = logdalys ~ win_prop_tree, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8125 -2.1737 -0.1597  2.6571  7.4237 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     8.8773     0.3137  28.297   <2e-16 ***
## win_prop_tree   0.6133     0.6480   0.946    0.345    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.353 on 174 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.005121,   Adjusted R-squared:  -0.0005966 
## F-statistic: 0.8957 on 1 and 174 DF,  p-value: 0.3453

Point Estimates: B0 = 8.8773 B1 = 0.6133

Std. Error: 0.6480

t-statistic: 0.946

p-value: 0.345

R-squared: 0.005121

OLS Assumptions - unbiased and lowest variance correlation: I’m not sure if you really need this with the regression, it’s just another way of showing that this relationship is “insignificant”

0.108 (zero correlation)

  1. ‘yes’ to an extent obviously hugely variable
  2. Can not test but has likely in this case been violated (diseases are hardly ever explained by just one variable). Go more into this in part 2
  3. Yes
  4. Mean residuals (-15.84636) - not unbiased
#data4 %>% 
  #summarize(dalys_cor = cor(log_dalys, win_prop_tree, use = "complete.obs")) %>% 
#mean(predictions$residuals, na.rm = TRUE)
mod2 <- lm(logdalys ~ win_prop_tree, data = data4)

predictions <- data4 %>% add_predictions(mod2) %>%
  mutate(residuals = log_dalys-pred)
#ggplot(data=predictions) + geom_histogram(aes(residuals), bins=80) +
 # theme_bw()

ggplot(predictions, aes(residuals)) +
  geom_histogram(color = "dark blue", fill = "dark blue", bins = 80) +
  geom_density(alpha = 0.2, fill="lightblue") + 
  theme_bw()
## Warning: Removed 15 rows containing non-finite values (stat_bin).
## Warning: Removed 15 rows containing non-finite values (stat_density).

ggplot(predictions) + 
  geom_point(aes(x=win_prop_tree, y=residuals), color = "dark blue", size = 3, alpha = .5) +
  theme_bw()
## Warning: Removed 15 rows containing missing values (geom_point).

Hypothesis Testing and OLS violations - adding income to model Does deforestation affect the incidence and burden of neglected tropical diseases within countries when the model contributes for the affect of country economic wealth?

H0: There is no effect of deforestation and NTD burden. B1 = 0 HA: There will be a positive effect between deforestation and NTD burden. Low economic, tropical countries with high deforestation rates will have a high incidence of DALYs per their populations. B1 =/ 0

Predictions: I expect to fail to reject the null, even this more focused subset of the data set is unlikely to capture the intricacies of NTD prevalence and estimation in relation to deforestation.

mod3 <- lm(log_dalys ~ win_prop_tree * GDP_capita, data = data4)

summary(mod3)
## 
## Call:
## lm(formula = log_dalys ~ win_prop_tree * GDP_capita, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1610 -1.2681  0.0039  1.1068  5.2214 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -5.960e+00  2.289e-01 -26.034  < 2e-16 ***
## win_prop_tree             1.094e+00  5.146e-01   2.125    0.035 *  
## GDP_capita               -7.401e-05  9.474e-06  -7.812 5.68e-13 ***
## win_prop_tree:GDP_capita -4.193e-05  2.700e-05  -1.553    0.122    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.005 on 169 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.4041, Adjusted R-squared:  0.3936 
## F-statistic: 38.21 on 3 and 169 DF,  p-value: < 2.2e-16

Point Estimates: B0 = -5.96 B1 = 1.094

Std. Error: 5.146e-01

t-statistic: 2.125

p-value: 0.035 *

R-squared: 0.3936

OLS Assumptions - unbiased and lowest variance

predictions2 <- data4 %>% add_predictions(mod3) %>%
 mutate(residuals = log_dalys-pred)

#ggplot(data=predictions2) + geom_histogram(aes(residuals), bins=80)

ggplot(predictions2, aes(residuals)) +
  geom_histogram(color = "dark blue", fill = "dark blue", bins = 80) +
  geom_density(alpha = 0.2, fill="lightblue") + 
  theme_bw()
## Warning: Removed 16 rows containing non-finite values (stat_bin).
## Warning: Removed 16 rows containing non-finite values (stat_density).

mean(predictions2$residuals, na.rm = TRUE)
## [1] -4.153504e-15
#ggplot(predictions) + geom_point(aes(x=GDP_capita, y=residuals))

ggplot(predictions2) + 
  geom_point(aes(x=win_prop_tree, y=residuals), color = "dark blue", size = 3, alpha = .5) +
  theme_bw()
## Warning: Removed 16 rows containing missing values (geom_point).

  1. ‘yes’ to an extent obviously hugely variable
  2. Can not test but has likely in this case been violated (diseases are hardly ever explained by just a couple). Go more into this…
  3. Yes
  4. Mean residuals: -4.153504e-15 (so basically zero, satisfying unbiasedness) BUT looking at geom_point graph those error terms are really tightly clumped and not homogeneous so there might be biasness there.

Hypothesis Testing and OLS Assumptions for Low Income & Tropical Countries Does deforestation affect the incidence and burden of neglected tropical diseases within countries that a geographically tropical and disadvantaged on the global economic spectrum?

H0: There is no effect of deforestation and NTD burden. B1 = 0 HA: There will be a positive effect between deforestation and NTD burden. Low economic, tropical countries with high deforestation rates will have a high incidence of DALYs per their populations. B1 =/ 0

Predictions: I expect to fail to reject the null, even this more focused subset of the data set is unlikely to capture the intricacies of NTD prevalence and estimation in relation to deforestation.

mod5 <- lm(log_dalys ~ win_prop_tree * GDP_capita, data = data6)

summary(mod5)
## 
## Call:
## lm(formula = log_dalys ~ win_prop_tree * GDP_capita, data = data6)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90404 -0.93667  0.00806  0.77030  1.99482 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -3.2146577  0.4261939  -7.543 4.59e-09 ***
## win_prop_tree             1.0140100  0.7203759   1.408   0.1674    
## GDP_capita               -0.0005546  0.0002762  -2.008   0.0518 .  
## win_prop_tree:GDP_capita -0.0007232  0.0004452  -1.625   0.1125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.075 on 38 degrees of freedom
## Multiple R-squared:  0.4589, Adjusted R-squared:  0.4162 
## F-statistic: 10.74 on 3 and 38 DF,  p-value: 2.989e-05

Point Estimates: B0 = -3.2146577 B1 = 1.0140100

Std. Error: 0.7203759

t-statistic: 1.408

p-value: 0.1674

R-squared: 0.4162

OLS Assumptions - unbiased and lowest variance 1. ‘yes’ to an extent obviously hugely variable 2. Can not test but has likely in this case been violated (diseases are hardly ever explained by just a couple). Go more into this… 3. Yes 4. Mean Residuals:-4.018415e-16, this is the least unbiased of the three models

predictions3 <- data6 %>% add_predictions(mod5) %>%
  mutate(residuals = log_dalys-pred)

#ggplot(data=predictions2) + geom_histogram(aes(residuals), bins=80)

ggplot(predictions3, aes(residuals)) +
  geom_histogram(color = "dark blue", fill = "dark blue", bins = 80) +
  geom_density(alpha = 0.2, fill="lightblue") + 
  theme_bw()

mean(predictions3$residuals, na.rm = TRUE)
## [1] -4.018415e-16
#ggplot(predictions) + geom_point(aes(x=GDP_capita, y=residuals))

ggplot(predictions3) + 
  geom_point(aes(x=win_prop_tree, y=residuals), color = "dark blue", size = 3, alpha = .5) +
  theme_bw()